library(dplyr)
library(leaflet)
library(geosphere)
library(data.table)
library(ggplot2)
library(lubridate)
library(caret)
library(readxl)
library(tidyverse)
library(ggthemes)
library(ggrepel)
library(reshape)
library(reshape2)
library(devtools)

Data preparation

We used all of the bluebike data in 2017

Read the bluebike data in 2017

# Importing the all monthly data in 2017
dat_201701<-read.csv("201701-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201702<-read.csv("201702-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201703<-read.csv("201703-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201704<-read.csv("201704-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201705<-read.csv("201705-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201706<-read.csv("201706-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201707<-read.csv("201707-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201708<-read.csv("201708-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201709<-read.csv("201709-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201710<-read.csv("201710-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201711<-read.csv("201711-hubway-tripdata.csv",stringsAsFactors = FALSE)
dat_201712<-read.csv("201712-hubway-tripdata.csv",stringsAsFactors = FALSE)

# Combine them
dat_2017 <- rbind(dat_201701, dat_201702, dat_201703, dat_201704, dat_201705,
                  dat_201706, dat_201707, dat_201708, dat_201709, dat_201710,
                  dat_201711, dat_201712)

Add variables

## age, age_cat, duration_min, year, month, month_abb, day, hour, wday, weekend
bbike <- dat_2017 %>%
  mutate(birth.year = as.numeric(birth.year)) %>%
        mutate(age = 2017 - birth.year) %>%
  mutate(age_cat = case_when(
    .$age >= 10 & .$age < 20 ~ 1,
    .$age >= 20 & .$age < 30 ~ 2,
    .$age >= 30 & .$age < 40 ~ 3,
    .$age >= 40 & .$age < 50 ~ 4,
    .$age >= 50 & .$age < 60 ~ 5,
    .$age >= 60 & .$age < 70 ~ 6,
    .$age >= 70 & .$age < 80 ~ 7,
    .$age >= 80 ~ 8)) %>%
  mutate(duration_min = tripduration / 60) %>%
  mutate(year = year(starttime), 
         month = month(starttime),
         month_abb = month(starttime, label = TRUE, abbr = TRUE), 
         day = day(starttime),
         hour = hour(starttime), 
         wday = wday(starttime, label = TRUE, abbr = TRUE))
## Warning in evalq(as.numeric(birth.year), <environment>): NAs introduced by
## coercion
## Trip distance (km)
setDT(bbike)[ , dist_km := distGeo(matrix(c(start.station.longitude, start.station.latitude), ncol = 2),matrix(c(end.station.longitude, end.station.latitude), ncol = 2))/1000]
bbike <- as.data.frame(bbike)
## overtime (if duration_min >  45, 1, 0)
bbike <- bbike %>%
  mutate(overtime = ifelse(duration_min > 45, 1, 0))
## user_start, user_end: number of users at the start/end station
bbike <- bbike %>%
  group_by(start.station.id) %>%
  mutate(user_start = n())

bbike <- bbike %>%
  group_by(end.station.id) %>%
  mutate(user_end = n())

Combine weather information

## temp_max, temp_min, rain, snownice(snow or ice)
weather <- read_excel("boston_weather.xls")
bbike <- bbike %>%
  group_by(year, month, day) %>%
  left_join(., weather, by = c("year", "month", "day")) 

Combine crash information daily

crash <- read.csv("crash_sept.csv", stringsAsFactors = FALSE)
crash <- crash %>%
  mutate(year = year(dispatch_ts), month = month(dispatch_ts), day = day(dispatch_ts))
temp <- crash %>%
  filter(year == 2017) %>%
  group_by(year, month, day, mode_type) %>%
  summarise(crash = n())
crash_wide <- temp %>%
  spread(mode_type, crash)
bbike_crash <- left_join(bbike, crash_wide, by = c("year", "month", "day"))

Motivation

The number of bluelike users has increased over several years. Among subscribers, if they pay $99 per year, they can use it unlimitedly. However, there is a time limit, for 45 minutes per once. We are going to figure out when they do not return their bike within 45 minutes and predict the pattern.

Membership and Ridership More than 8 million trips have been taken by Bluebikes riders since the 2011 launch (as of 12/2018) An estimated 87,000 unique riders took trips in 2016

bbike_summary <- read_excel("bluebike_summary.xlsx")
plot <- melt(bbike_summary, id.vars = "year") %>%
  filter(variable == "subscriber" | variable == "customer") 
plot %>% 
  ggplot(aes(year, value)) +
  geom_bar(aes(fill = variable), stat = "identity", position = "stack") +
  scale_fill_manual(values = c("#FF8F1C", "#0050B5")) +
  scale_x_continuous(breaks = c(2011, 2012, 2013, 2014, 2015, 2016, 2017)) +
  xlab("Year") +
  ylab("Riders") +
  ggtitle("Number of Riders since 2011") +
  theme_bw()

bbike_summary %>%
  ggplot(aes(year, total_trips)) +
  geom_point(color = "#FF8F1C", size = 2) +
  geom_line(color = "#1D428A", alpha = 0.7) +
  xlab("Year") +
  ylab("Numbers") +
  ggtitle("Total Number of Trips since 2011") +
  theme_bw()

bbike %>%
        mutate(gender = as.factor(gender)) %>%
        ggplot(aes(age_cat)) +
        geom_bar(aes(fill = gender)) +
        scale_fill_brewer( palette = "Oranges")
## Warning in mutate_impl(.data, dots): Unequal factor levels: coercing to
## character
## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector

## Warning in mutate_impl(.data, dots): binding character and factor vector,
## coercing into character vector
## Warning: Removed 210947 rows containing non-finite values (stat_count).

Blue bike Stations

# Create data frame
dat_station<- read.csv("Hubway_Stations_as_of_July_2017.csv")
dat_2017_station <- bbike %>%
filter(!birth.year=="\\N")%>%
                        filter(birth.year>1900 & birth.year<=2017)%>%
  group_by(start.station.id) %>% summarize(number = n(), start.station.latitude=first(start.station.latitude), start.station.longitude=first(start.station.longitude),start.station.name=first(start.station.name))%>% filter(start.station.latitude>0)
summary(dat_2017_station)
##  start.station.id     number      start.station.latitude
##  Min.   :  1.00   Min.   :    4   Min.   :42.30         
##  1st Qu.: 54.75   1st Qu.: 1794   1st Qu.:42.34         
##  Median :108.50   Median : 4750   Median :42.36         
##  Mean   :111.91   Mean   : 5625   Mean   :42.36         
##  3rd Qu.:173.25   3rd Qu.: 7614   3rd Qu.:42.37         
##  Max.   :232.00   Max.   :35702   Max.   :42.41         
##  start.station.longitude start.station.name
##  Min.   :-71.17          Length:196        
##  1st Qu.:-71.11          Class :character  
##  Median :-71.08          Mode  :character  
##  Mean   :-71.09                            
##  3rd Qu.:-71.06                            
##  Max.   :-71.01

Map about number of users at each station

# Distinguish stations by color based on the number of users
getColor <- function(df){
  sapply(df$number, function(number) {
  if(number %in% 1:4000) {
    "green"
  } else if(number %in% 4001:10000) {
    "orange"
  } else if(number >10000) {
    "red"
  } else {
    "blue"
  } })
}

icons <- awesomeIcons(
  icon = 'ios-close',
  iconColor = 'black',
  library = 'ion',
  markerColor = getColor(dat_2017_station)
)

leaflet(dat_2017_station) %>% addTiles() %>%
  addAwesomeMarkers(~start.station.longitude
, ~start.station.latitude, icon=icons, label=~as.character(number), popup = ~start.station.name)

Distribution of Trip Duration

bbike_member <- bbike %>%
  filter(usertype == "Subscriber")
summary(bbike_member$duration_min)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1.02     6.10     9.78    13.31    15.48 61276.80

As you can see, the data seems to have wrong information. The very long tripduration might be attributable to lost or other errors. Therefore, we limit the range from 0 to 75 minutes for the duration in this study.

bbike_member <- bbike_member %>%
  filter(duration_min < 50)
bbike_member %>%
  mutate(group = ifelse(rain == 0, "no rain", "rain")) %>%
  ggplot(aes(duration_min, y = ..count.., fill = group)) +
  geom_density(alpha = 0.2) +
  xlab("Trip Duration (min)") +
  ylab("Riders")

bbike_member %>%
  mutate(gender = as.factor(gender)) %>%
  ggplot(aes(duration_min, y = ..count.., fill = gender)) +
  geom_density(alpha = 0.2) +
  xlab("Trip Duration (min)") +
  ylab("Riders")

Trip distance

# Distance
bbike %>% 
        filter(birth.year > 1900 & birth.year <= 2017)%>%
        group_by(age_cat) %>% 
        summarize(avg = mean(dist_km), se = sd(dist_km) / sqrt(n())) %>% 
  ggplot(aes(age_cat, avg))+ 
  geom_boxplot(aes(ymin = avg - 2*se, ymax = avg+2 * se))+
  geom_point(color = "#FF8F1C")+
  geom_line(color = "#1D428A")+
  scale_x_continuous(breaks=(c(1,2,3,4,5,6,7,8)), labels=c("10-20","20-30","30-40","40-50","50-60","60-70","70-80","80-"))+
  xlab(expression(paste(Age, " (years)")))+
  ylab(expression(paste(Distance," (km)"))) +
        theme_bw()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

There is no consistent trend between age and trip distance.

Machine Learning - using logistic regression to predict overtime users characteristics

Rationale: With an annual membership, users can ride unlimited bike ride for up to 45 mins. Additional $2.5 will be charged per 30 mins for members. In this analysis, we are interested in learning about the characteristics of overtime riders so that Bluebike can gain extra revenue.

Only keep memebers in the dataset

bbikemember <- bbike %>%
        ungroup() %>%
  filter(usertype == "Subscriber" & duration_min < 50) %>%
        mutate(gender = as.factor(gender), day = as.factor(day),
               hour = as.factor(hour)) %>%
        mutate(rain_cat = ifelse(rain == 0, 0, 1)) %>%
        mutate(snownice_cat = ifelse(snownice == 0, 0, 1)) %>%
        mutate(overtime = ifelse(duration_min < 15, 0, 1)) %>%
        mutate(satsun = ifelse(wday %in% c("Sat", "Sun"), 1, 0))

logistic regression

set.seed(1)

library(caret)

Train <- createDataPartition(bbikemember$overtime, p=0.5, list=FALSE)
training <- bbikemember[ Train, ]
testing <- bbikemember[ -Train, ]

bbikemember$overtime = as.factor(as.numeric(as.character(bbikemember $overtime)))

#overall accuracy 
p = 0.358  #290235/810623
y_hat <- sample(c("0","1"), length(testing), replace = TRUE, prob=c(p, 1-p)) %>% 
  factor(levels = levels(bbikemember$overtime))
mean(y_hat == testing$overtime)
## Warning in `==.default`(y_hat, testing$overtime): longer object length is
## not a multiple of shorter object length
## Warning in is.na(e1) | is.na(e2): longer object length is not a multiple of
## shorter object length
## [1] 0.3973021
#logistic regression
glm.fit <- bbikemember %>% 
  glm(overtime ~ gender + age + month + satsun + rain_cat +
               snownice_cat + user_start + user_end, data=., family = "binomial")

summary(glm.fit)
## 
## Call:
## glm(formula = overtime ~ gender + age + month + satsun + rain_cat + 
##     snownice_cat + user_start + user_end, family = "binomial", 
##     data = .)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4899  -0.8238  -0.7000   1.3422   2.7176  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.801e-03  3.240e-02  -0.179    0.858    
## gender1      -7.352e-01  3.055e-02 -24.061  < 2e-16 ***
## gender2      -4.139e-01  3.074e-02 -13.466  < 2e-16 ***
## age           6.726e-03  1.918e-04  35.063  < 2e-16 ***
## month         6.172e-03  8.827e-04   6.992  2.7e-12 ***
## satsun        2.279e-01  5.587e-03  40.793  < 2e-16 ***
## rain_cat     -8.377e-02  4.849e-03 -17.276  < 2e-16 ***
## snownice_cat -5.960e-01  2.921e-02 -20.408  < 2e-16 ***
## user_start   -2.905e-05  2.967e-07 -97.903  < 2e-16 ***
## user_end     -2.728e-05  2.760e-07 -98.837  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1256483  on 1094867  degrees of freedom
## Residual deviance: 1220356  on 1094858  degrees of freedom
##   (1855 observations deleted due to missingness)
## AIC: 1220376
## 
## Number of Fisher Scoring iterations: 4
#wday.L = Sunday wday.Q = Mondaywday.C = Tuesdaywday^4 = Wednesday wday^5 = Thursday wday^6 = Friday Intercept = Saturday 

#prediction
p_hat <- predict(glm.fit, newdata=testing,type="response")
y_hat <- ifelse(p_hat > 0.5, 1, 0) %>% factor()

#confusion matrix 
table(predicted = y_hat, actual = testing$overtime)
##          actual
## predicted      0      1
##         0 404191 142443
##         1    441    353
confusionMatrix(data = factor(y_hat), reference = factor(testing$overtime))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction      0      1
##          0 404191 142443
##          1    441    353
##                                           
##                Accuracy : 0.739           
##                  95% CI : (0.7378, 0.7402)
##     No Information Rate : 0.7392          
##     P-Value [Acc > NIR] : 0.6074          
##                                           
##                   Kappa : 0.002           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.998910        
##             Specificity : 0.002472        
##          Pos Pred Value : 0.739418        
##          Neg Pred Value : 0.444584        
##              Prevalence : 0.739151        
##          Detection Rate : 0.738345        
##    Detection Prevalence : 0.998550        
##       Balanced Accuracy : 0.500691        
##                                           
##        'Positive' Class : 0               
## 

ROC

library(purrr)
library(caret)
library(ggplot2)

probs <- seq(0, 1, length.out = 10)
guessing <- map_df(probs, function(p){
  y_hat <- 
    sample(c("0", "1"), length(testing), replace = TRUE, prob=c(p, 1-p)) %>%
    factor(levels = levels(bbikemember$overtime))
  list(method = "Guessing",
       FPR = 1 - specificity(y_hat, bbikemember$overtime),
       TPR = sensitivity(y_hat, bbikemember$overtime))
})
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% negative & reference %in% negative: longer object
## length is not a multiple of shorter object length
## Warning in complete.cases(data) & complete.cases(reference): longer object
## length is not a multiple of shorter object length
## Warning in data %in% positive & reference %in% positive: longer object
## length is not a multiple of shorter object length
guessing %>% qplot(FPR, TPR, data =., xlab = "1 - Specificity", ylab = "Sensitivity")

Decision tree

library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
png(file = "decision_tree.png")

output_tree <- ctree(overtime ~ gender + age + factor(rain_cat),
                     data = bbikemember)
plot(output_tree)
dev.off()
## quartz_off_screen 
##                 2